library(tidyverse)
library(ggthemes)
library(viridis)
library(gganimate)

Data wrangling

global_mortality <- here::here("week_3/data", "global_mortality.xlsx") %>%
  readxl::read_xlsx(.)

Clean up the names for easy access but save the names for pretty plotting.

names(global_mortality) <- str_trim(
  str_remove_all(names(global_mortality), "[[:punct:]]")
)
causes <- colnames(global_mortality)[ -c(1, 2, 3)]

Wide to tall so that we can group by countries or causes of death etc.

mortality_tall <- tidyr::gather(
  data = global_mortality,
  key = "cause",
  value = "percent",
  -c(country, countrycode, year),
  factor_key = TRUE
  )

Sum by year for each country.

mortality_country <- group_by(mortality_tall, country, cause) %>%
  summarise(percent = sum(percent, na.rm = TRUE))

Rows where country code is NA are actually groups of countries rather than a single country. The groups are overlapping (e.g., countries are grouped according to geographic region as well as SDI). We can explore each group separately.

regions <- c(
  "Andean Latin America",
  "Central Latin America",
  "Tropical Latin America",
  "Latin America and Caribbean",
  "Southern Latin America",
  "North America",
  "Caribbean",
  "Australasia",
  "Oceania",
  "East Asia",
  "South Asia",
  "Central Asia",
  "Southeast Asia",
  "Eastern Europe",
  "Central Europe",
  "Western Europe",
  "North Africa and Middle East",
  "Eastern Sub-Saharan Africa",
  "Central Sub-Saharan Africa",
  "Western Sub-Saharan Africa",
  "Southern Sub-Saharan Africa",
  "Sub-Saharan Africa"
)

country_groups <- filter(mortality_tall, country %in% regions) %>%
    droplevels()

Re-arrange country groups geographically.

country_groups$country <- factor(
  country_groups$country,
  levels = regions
)

Leading causes of death in each geographical region

p <- ggplot(country_groups, aes(country, cause, frame = year)) +
  geom_tile(aes(fill = percent)) +
  scale_fill_viridis(name = "% Population") +
  theme_tufte() +
  theme(axis.text.x = element_text(angle = 90)) +
  xlab("County Group") + ylab("Cause") +
  theme(legend.title = element_blank())

gganimate(p, interval = 2)

Leading causes of death by SDI

sdi <- c(
  "High SDI",
  "High-middle SDI",
  "Middle SDI",
  "Low-middle SDI",
  "Low SDI"
)

sdi_groups <- filter(mortality_tall, country %in% sdi) %>%
  droplevels()

sdi_groups$country <- factor(
  sdi_groups$country,
  levels = sdi
)

p2 <- ggplot(sdi_groups, aes(country, cause, frame = year)) +
  geom_tile(aes(fill = percent)) +
  scale_fill_viridis(name = "% Population") +
  theme_tufte() +
  theme(axis.text.x = element_text(angle = 90)) +
  xlab("") + ylab("Cause")

gganimate(p2)

Leading causes of death in the UK

uk <- c("England", "Northern Ireland", "Scotland", "Wales")
uk_groups <- filter(mortality_tall, country %in% uk) %>%
  droplevels()

Let us look at the top 10 causes rather than the whole data set.

top_10 <- group_by(uk_groups, country, year) %>%
    top_n(n = 10, wt = percent)  %>%
    droplevels() %>%
    pull(cause) %>% unique

uk_groups <- filter(uk_groups, cause %in% top_10) %>% droplevels()
uk_groups$country <- factor(
  uk_groups$country,
  levels = uk
)

p3 <- ggplot(uk_groups, aes(percent, cause, col = country, frame = year)) +
      geom_point(aes(size = percent)) + theme_tufte()

gganimate(p3, interval = 2)